from pathlib import Path
import time
import pandas
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

ROOT = '/home/burak/workspace/sefa/frequency_analysis'
tweet_distance = timedelta(minutes=10)

def main():

    header = [
        'no',
        'id',
        'username',
        'time',
        'retweet_count',
        'reply_count',
        'like_count',
        'quote_count',
        'text',
    ]

    types = {
        'no': int,
        'id': int,
        'username': str,
        'time': str,
        'retweet_count': int,
        'reply_count': int,
        'like_count': int,
        'quote_count': int,
        'text': str,
    }

    tweets = pandas.read_csv('/Users/AyseEsra/Desktop/tweets.csv', encoding='unicode_escape', dtype=types, names=header, skiprows=1)
    tweets_refugees = pandas.read_csv('/Users/AyseEsra/Desktop/FilteredTweets/Refugees/tweets - Refugees.csv', encoding='unicode_escape', dtype=types, names=header, skiprows=1)
    tweets_nationalism = pandas.read_csv('/Users/AyseEsra/Desktop/FilteredTweets/Nationalism/tweets - Nationalism.csv', encoding='unicode_escape', dtype=types, names=header, skiprows=1)

    tweets_bahceli = filter_dataframe(tweets, ('username', 'dbdevletbahceli'))
    tweets_aksener = filter_dataframe(tweets, ('username', 'meral_aksener'))
    tweets_ozdag = filter_dataframe(tweets, ('username', 'umitozdag'))

    tweets_refugees_bahceli = filter_dataframe(tweets_refugees, ('username', 'dbdevletbahceli'))
    tweets_refugees_aksener = filter_dataframe(tweets_refugees, ('username', 'meral_aksener'))
    tweets_refugees_ozdag = filter_dataframe(tweets_refugees, ('username', 'umitozdag'))

    tweets_nationalism_bahceli = filter_dataframe(tweets_nationalism, ('username', 'dbdevletbahceli'))
    tweets_nationalism_aksener = filter_dataframe(tweets_nationalism, ('username', 'meral_aksener'))
    tweets_nationalism_ozdag = filter_dataframe(tweets_nationalism, ('username', 'umitozdag'))

    dataframes = {
        'tweets': tweets,
        'tweets_refugees': tweets_refugees,
        'tweets_nationalism': tweets_nationalism,
        'tweets_bahceli': tweets_bahceli,
        'tweets_aksener': tweets_aksener,
        'tweets_ozdag': tweets_ozdag,
        'tweets_refugees_bahceli': tweets_refugees_bahceli,
        'tweets_refugees_aksener': tweets_refugees_aksener,
        'tweets_refugees_ozdag': tweets_refugees_ozdag,
        'tweets_nationalism_bahceli': tweets_nationalism_bahceli,
        'tweets_nationalism_aksener': tweets_nationalism_aksener,
        'tweets_nationalism_ozdag': tweets_nationalism_ozdag,
    }

    for description, df in dataframes.items():
        frequency_analysis(df, description)

    return

def frequency_analysis(df, description):
    times = [datetime.strptime(t.strip(), '%Y-%m-%d %H:%M:%S') for t in df['time']]
    times = sorted(times)
    times_shifted = list(times[1:])
    diff = [times_shifted[i] - times[i] for i in range(len(times_shifted))]
    diff_filtered = [d.days for d in diff if d > tweet_distance]
    histogram = np.bincount(diff_filtered)

    high_limit = 8
    if len(histogram) > high_limit:
        more_than_high_limit = histogram[high_limit:].sum()
        histogram = histogram[:high_limit+1]
        histogram[high_limit] = more_than_high_limit
    print(f'\nDescription: {description}')
    print(f'Frequency:\n{str(histogram)}')
    plt.clf()
    time.sleep(0.1)
    fig, ax = plt.subplots(figsize=(12,8))
    plt.stem(np.arange(len(histogram)), histogram)
    for i in range(len(histogram)):
        ax.text(i, histogram[i], histogram[i], size=12)
    plt.grid()
    plt.xlabel('Days')
    plt.ylabel('Number of Tweets')
    plt.title(f'{description}')
    plt.savefig(f'result/{description}.jpg')
    return

def filter_dataframe(df, filter):
    column, value = filter
    return df[df[column] == value]

if __name__ == '__main__':
    main()
